library(raster)
## Loading required package: sp
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.5.2
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(pophelper)
## pophelper v2.3.1 ready.
library(scatterpie)
## 
## Attaching package: 'scatterpie'
## The following object is masked from 'package:sp':
## 
##     recenter
library(ggnewscale)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

First plot CV

### parvi

parvi_CV<-read.delim("../data/genetic/output/admixture/parvi/Kerror_parviglumis.txt", 
                    sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
                    mutate(K = parse_number(V3)) %>% 
                    select(K, V4) %>% rename(CV=V4)

## Plot CV
p <- ggplot(parvi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()  

p + theme(axis.title.x = element_text(face="bold", size=20),
          axis.text.x  = element_text(angle=90, vjust=0.5, size=16),
          axis.title.y = element_text(face="bold", size=20),
          axis.text.y  = element_text(size=16)) +
  ggtitle(label = "Z. m. parviglumis")

### mexicana

mexi_CV<-read.delim("../data/genetic/output/admixture/mexicana/Kerror_mexicana.txt", 
                    sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
  mutate(K = parse_number(V3)) %>% 
  select(K, V4) %>% rename(CV=V4)

## Plot CV
p <- ggplot(mexi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()  

p + theme(axis.title.x = element_text(face="bold", size=20),
          axis.text.x  = element_text(angle=90, vjust=0.5, size=16),
          axis.title.y = element_text(face="bold", size=20),
          axis.text.y  = element_text(size=16)) +
  ggtitle(label = "Z. m. mexicana")

The CV error of the Admixture analysis doesn't show a clear K value, so we chose those K values where the slope changes for each species. For ** Z. m. parviglumis** this is K=13 and K=25, and for Z. m. mexicana K= 5, K=10 and K=20.

Z. m. parviglumis

Load admixture Q data and samples metadata

# get data
parvifiles<-c("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.25.Q",
              "../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q")

parvi_list<- readQ(parvifiles)

# Check
attributes(parvi_list)
## $names
## [1] "bytaxa_parviglumis.25.Q" "bytaxa_parviglumis.13.Q"
# Get metadata
meta<-read.delim("../data/genetic/output/ADN_pasap_3604.txt")
parviPlinksamples<-read.table("../data/genetic/output/bytaxa_parviglumis.fam")
parvimeta<-meta[meta$POBL %in% parviPlinksamples$V2, ]
levels(parvimeta$ESTADO)[13]<-"EdoMex"
levels(parvimeta$ESTADO)[14]<-"Michoacan"

# add sample metadata to qlist
attributes(parvi_list[[1]])$row.names<-as.character(parvimeta$POBL)
attributes(parvi_list[[2]])$row.names<-as.character(parvimeta$POBL)

# drop unused levels
parvimeta<-droplevels(parvimeta)

Plot Qvals of both Ks of interest:

p1 <- plotQ(alignK(parvi_list[1:2]),
            sortind = "all",
            imgoutput="join", returnplot=T, sharedindlab=FALSE,
            exportplot=F,basesize=11)
grid.arrange(p1$plot[[1]])

Plot only K=13 ordering by all genetic clusters

p1<-plotQ(parvi_list[2],
            returnplot=T,
            exportplot=F,
            sortind = "all", basesize = 11,
           # showindlab= TRUE, useindlab=TRUE, indlabangle=90, indlabsize=0.1,
      exportpath = "../data/genetic/output/admixture/parvi")
plot(p1$plot[[1]])

Scatterpies of admixture groups

# Read raw Qval file
Qval<-read.table(paste0("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q"))
names(Qval)<-paste0("K", 1:ncol(Qval))

# add metadata
Qval<-cbind(parvimeta$POB, parvimeta$INDIV, parvimeta$LONGITUDE, parvimeta$LATITUDE, Qval)
names(Qval)[1:4]<-c("POB", "INDIV", "LONGITUDE", "LATITUDE") 

# generate same colors than the ones used for admixture plot
piecolors<-gplots::rich.colors(13)

# plot scatterpie
ppie1<- ggplot() + geom_scatterpie(data=Qval,
                           aes(x=LONGITUDE, 
                               y=LATITUDE,
                               group=POB),
                               color=NA,
                               cols=paste0("K", 1:13)) +
 scale_fill_manual(values= piecolors,
                    name="Genetic cluster") + theme_bw()
  
ppie1

Adding populations names:

# plot scatterpie
ppie1 + geom_text(data=parvimeta,
                      aes(x=parvimeta$LONGITUDE,
                          y=parvimeta$LATITUDE,
                          label=POB),
                      color="grey40",
                     check_overlap = T,
                      hjust = 0, vjust=1, nudge_x = 0.0005,
                 size= 5.5)
## Warning: Use of `parvimeta$LONGITUDE` is discouraged. Use `LONGITUDE` instead.
## Warning: Use of `parvimeta$LATITUDE` is discouraged. Use `LATITUDE` instead.

Plot ordering populations in geographical order

# Get latitudinal order
df<-data.frame(POB=parvimeta$POB, LONGITUDE=parvimeta$LONGITUDE) %>%
    unique() %>%
    arrange(LONGITUDE)
unique(df$POB)
##  [1] BVPU BGUA BQUE BEJU BMAN BSAU BTAR BTAC BNOC BRED BCAR BHUE BTZI BTIQ BTUZ
## [16] BJUA BTAL BPCH BZUL BOTZ BVBR BTEJ BTEL BZAC BAPX BIXC BTCT BMAZ MALI BCOL
## [31] BHUI BMOR BOLI BOAX
## 34 Levels: BAPX BCAR BCOL BEJU BGUA BHUE BHUI BIXC BJUA BMAN BMAZ BMOR ... MALI
# set levels of Pops in latitudinal order
desired_order<-unique(df$POB)
parvimeta$POBorder<-factor(parvimeta$POB, levels = desired_order)

# change levels so that they sort in that order alphabetically
levels(parvimeta$POBorder)<-paste0(1:34, levels(parvimeta$POBorder))
levels(parvimeta$POBorder)[1:9]<-paste0("0", levels(parvimeta$POBorder)[1:9])

# set labels
labset2<-data.frame(POBorder=as.character(parvimeta$POBorder), 
                    ESTADO=as.character(parvimeta$ESTADO), stringsAsFactors = FALSE)

#plot
p<-plotQ(parvi_list[2],
            returnplot=T,
            exportplot=F,
            sortind = "all",
      grplab=labset2, 
      ordergrp=TRUE, 
      grplabangle= 45, grplabsize=10, linesize=2, divsize = 2, grplabheight= 2, grplabpos=0.5,
      exportpath = "../data/genetic/output/admixture/parvi")
plot(p$plot[[1]])

Extract in which PGD do the sampled individuals fall. This would be used to then add this information to the plot and see how well PGD correspond to genetic clusters.

First load raster data of Zea mays spp parviglumis SDM and the PGD that fell within it:

# path to rasters
my_rasters<-c("../data/spatial/modelosDarwinZea/Zea_mays_parviglumis.tif", # SDM 
              "../data/spatial/areasProxyDivGen/crop_to_sp_final/PDG_Zea_mays_parviglumis.tif") #SDM subdivied by Proxies of gen div

# stack rasters and add nicer name
parvi_rasters<-stack(my_rasters)

# check
names(parvi_rasters)
## [1] "Zea_mays_parviglumis"     "PDG_Zea_mays_parviglumis"
# Check projection
proj4string(parvi_rasters$PDG_Zea_mays_parviglumis)
## [1] "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
# change projection to longlat for nicer visualization
newcrs <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
parvi_rasters<-projectRaster(parvi_rasters, crs=newcrs, method="ngb")
parvi_rasters<-stack(parvi_rasters)

Z. m. parviglumis has 29 PGD distributed like this:

# Get number of cells of each PDG
table(getValues(parvi_rasters$PDG_Zea_mays_parviglumis))
## 
##       0       2       5       6       7       8      10      11      15      20 
## 2242616     395    8786    6753       3    9641     425    2954    5340    1025 
##      21      23      25      32      36      37      38      39      40      41 
##    1070      92    2237     359    7557   31658    1731     393    4462     201 
##      42      43      48      58      84      86      90      92      93 
##       4    8556    5913      34    1290       6     501     239      50

Plot PGD and sampling points of genetic data.

# crop raster to make it more managable
parvi_small<- raster::crop(parvi_rasters$PDG_Zea_mays_parviglumis,
                           y=extent(c(-107, -96 , 15.5, 22)))

# convert to df to plot with ggplot
raster_df<-as.data.frame(rasterToPoints(parvi_small, spatial = TRUE))
raster_df$PDG_Zea_mays_parviglumis<-as.character(raster_df$PDG_Zea_mays_parviglumis)

# get nice colors
mycols<-c("grey40","#917031", "#4d6cd0", "#8bbb39", "#c556c5", "#53d06a", "#895dc8", "#42a331", "#d84492", "#69b974", "#d63c48", "#4dc0b1", "#d3542b", "#4b9cd3", "#d2a337", "#9289cf", "#b1b23a", "#9a5398", "#498331", "#cc4668", "#37835d", "#df89be", "#6e7721", "#9b496e", "#b9b06d", "#ac575b", "#65743a", "#e78b7a", "#db873d", "#a2552d")


# plot PGD raster
p<- ggplot() +
    geom_raster(data = raster_df , aes(x = x, y = y, fill = PDG_Zea_mays_parviglumis)) +
    scale_fill_manual(values= alpha(mycols, 0.5) , name="PGD") + 
    theme_bw() 

# add sampling points
p + geom_point(data=parvimeta, aes(x=LONGITUDE, 
                                   y=LATITUDE))
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.

Plot scatterpies with genetic groups

p_piemap<-p + new_scale("fill") + # this is needed because raster and pie have diff fills
          geom_scatterpie(data=Qval,
                                   aes(x=LONGITUDE, 
                                       y=LATITUDE,
                                       group=POB),
                                       color="NA",
                                       cols=paste0("K", 1:13)) +
         scale_fill_manual(values= piecolors,
                            name="Genetic cluster") +
          labs(x="", y= "") + theme(text = element_text(size = 25))
p_piemap
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.

Extract in which PGD each genetic sample falls:

# Create function to estimate the mode, omitting na values
Mode <- function(x) {
  ux <- unique(na.omit(x))
  ux[which.max(tabulate(match(x, ux)))]
}

# transform 0 value (out of sp SDM range) to NA so that only PGD are considered
parvi_rasters$PDG_Zea_mays_parviglumis<-reclassify(parvi_rasters$PDG_Zea_mays_parviglumis, 
                                                   rcl=c(0, .1, NA), include.lowest=TRUE)

# extract the PDG most frequent in a given buffer radio of the sampling point
# ignoring NA values (see Mode fun)
PGD_points<-raster::extract(parvi_rasters$PDG_Zea_mays_parviglumis,
                            buffer=2000, fun=Mode, # so that no points fell in only NAs
                    y=data.frame(long=parvimeta$LONGITUDE,
                                 lat=parvimeta$LATITUDE)) %>%
            as.character()
PGD_points<-parvimeta %>% select(LONGITUDE, LATITUDE, DNASample, POB, POBorder) %>%
                  mutate(PGD_points=PGD_points) 

table(PGD_points$PGD_points)
## 
##  10  11  15  36  37  40  41  43  48   5   6   8  84 
##  37  44  12 126 685  84  42 288  37 205 123  55   5
# check if there are still NA
sum(is.na(PGD_points$PGD_points)) 
## [1] 26
PGD_points[is.na(PGD_points$PGD_points), ]
##      LONGITUDE LATITUDE DNASample  POB POBorder PGD_points
## 1078 -99.28528 16.98056    186_10 BTCT   27BTCT       <NA>
## 1079 -99.28528 16.98056    186_11 BTCT   27BTCT       <NA>
## 1080 -99.28528 16.98056    186_12 BTCT   27BTCT       <NA>
## 1081 -99.28528 16.98056    186_13 BTCT   27BTCT       <NA>
## 1082 -99.28528 16.98056    186_14 BTCT   27BTCT       <NA>
## 1083 -99.28528 16.98056    186_16 BTCT   27BTCT       <NA>
## 1084 -99.28528 16.98056    186_17 BTCT   27BTCT       <NA>
## 1085 -99.28528 16.98056    186_18 BTCT   27BTCT       <NA>
## 1086 -99.28528 16.98056    186_19 BTCT   27BTCT       <NA>
## 1087 -99.28528 16.98056     186_1 BTCT   27BTCT       <NA>
## 1088 -99.28528 16.98056    186_20 BTCT   27BTCT       <NA>
## 1089 -99.28528 16.98056    186_21 BTCT   27BTCT       <NA>
## 1090 -99.28528 16.98056    186_22 BTCT   27BTCT       <NA>
## 1091 -99.28528 16.98056    186_23 BTCT   27BTCT       <NA>
## 1092 -99.28528 16.98056    186_24 BTCT   27BTCT       <NA>
## 1093 -99.28528 16.98056    186_25 BTCT   27BTCT       <NA>
## 1094 -99.28528 16.98056    186_26 BTCT   27BTCT       <NA>
## 1095 -99.28528 16.98056    186_27 BTCT   27BTCT       <NA>
## 1096 -99.28528 16.98056    186_28 BTCT   27BTCT       <NA>
## 1097 -99.28528 16.98056    186_29 BTCT   27BTCT       <NA>
## 1098 -99.28528 16.98056     186_2 BTCT   27BTCT       <NA>
## 1099 -99.28528 16.98056    186_30 BTCT   27BTCT       <NA>
## 1100 -99.28528 16.98056     186_4 BTCT   27BTCT       <NA>
## 1101 -99.28528 16.98056     186_5 BTCT   27BTCT       <NA>
## 1102 -99.28528 16.98056     186_7 BTCT   27BTCT       <NA>
## 1103 -99.28528 16.98056     186_9 BTCT   27BTCT       <NA>
# assing them to closest PGD
PGD_points[is.na(PGD_points$PGD_points), ]$PGD_points <-48
table(PGD_points$PGD_points)
## 
##  10  11  15  36  37  40  41  43  48   5   6   8  84 
##  37  44  12 126 685  84  42 288  63 205 123  55   5

Use this information to see in which genetic group our PGD fall:

# set levels of PGD more or less in lontigitudinal order
desired_order<-c("48", "36", "10", "5", "15", "11" , "37", "6", "84", "43", "8", "40", "41")

PGD_points$PGD_order<-factor(PGD_points$PGD_points, levels = desired_order)

# change levels so that they sort in that order alphabetically
levels(PGD_points$PGD_order)<-paste0(1:13, "_",levels(PGD_points$PGD_order))
levels(PGD_points$PGD_order)[1:9]<-paste0("0", levels(PGD_points$PGD_order)[1:9])

# set labels
labsetPGD<-data.frame(PGD_points=as.character(PGD_points$PGD_order),
                      POBorder=as.character(PGD_points$POBorder),
                      stringsAsFactors = FALSE)

p3<-plotQ(parvi_list[2],
            returnplot=T,
            exportplot=T,
            sortind = "all",
      grplab=labsetPGD, selgrp = "PGD_points",
      ordergrp=T, 
      grplabangle= 90, grplabsize=8, linesize=3, divsize = 3.5,
      exportpath = getwd(), height = 20, width = 80)
## Drawing plot ...
## /Volumes/datos/USUARIOS/DarwinUICN/Zonificacion/Zonas_de_vida/analisisUniCons_proxiGen/bin/bytaxa_parviglumis.13.Q.png exported.
plot(p3$plot[[1]])

Omit labels for plot for the paper: